* generate variables and intermediate MCVL datasets to be used for main analysis
**** OUTLINE ****
* prelims
* cohabitants
* birthdate
* spell dates and gaps
* identify overlaps
* eliminate overlaps 
* UI entitlement
* identify 'fijos discontinuos'			
* industry
* pensions
* unemployment rates
* closing
********************************************************************************
* prelims																	   
********************************************************************************
{ 
clear all
cap log close
set more off
}
********************************************************************************
* birthdate
********************************************************************************
{
* load personal information
use "$data/mcvl2004-17_01pers_rest_age_un.dta", clear

* keep relevant vars
keep id birth*

* get birth
rename birth2017 birth
forval year = 2016(-1)2005 {
	replace birth = birth`year' if birth=="" & birth`year' != ""
	drop birth`year'
}

* stata format
tostring birth, replace
gen str4 yob = substr(birth, 1, 4)
gen str2 mob = substr(birth, 5, 2)
destring yob, replace
destring mob, replace
gen dob = 31 if (mob==1|mob==3|mob==5|mob==7|mob==8|mob==10|mob==12)
replace dob = 30 if (mob==4|mob==6|mob==9|mob==11)
replace dob = 28 if (mob==2)
gen bday = mdy(mob,dob,yob) 
drop yob mob dob
la var bday "birth date"

* store
keep id bday
format bday %td
save "$data/int/birthdate.dta", replace
	
}
********************************************************************************
* spell dates and gaps
********************************************************************************
{
* load affiliation data
use "$data/mcvl2004-17_02affi_rest_age_un.dta", clear

* keep relevant variables
keep id affstart affend
duplicates drop id affstart affend, force

* adjustments
g long t_affstart = affstart
g long t_affend = affend
replace t_affstart = 19640101 if affstart==19640000
replace t_affstart = 19670130 if affstart==19670030
replace t_affstart = 19790228 if affstart==19790229
replace t_affstart = 19890228 if affstart==19890229
replace t_affstart = 19880228 if affstart==19880230
replace t_affstart = 19890228 if affstart==19890230
replace t_affstart = 10830228 if affstart==19830229
replace t_affend = 19830228 if affend==19830229
replace t_affend = . if affend==0
drop if affend==0

* generate dates in stata format
tostring t_affstart, g(start)
tostring t_affend, g(end)
gen yaffs = substr(start, 1, 4)
gen yaffe = substr(end, 1, 4)
gen maffs = substr(start, 5, 2)
gen maffe = substr(end, 5, 2)
gen daffs = substr(start, 7, 2)
gen daffe = substr(end, 7, 2)

destring yaffs, replace
destring yaffe, replace
destring maffs, replace
destring maffe, replace
destring daffs, replace
destring daffe, replace

drop start end
gen start = mdy(maffs, daffs, yaffs)
gen end = mdy(maffe, daffe, yaffe)
format start %td
format end %td
drop yaff* maff* daff*

gen length = end - start
sort id affstart affend
gen gap = start - end[_n-1]
egen tagid = tag(id)
replace gap = . if tagid == 1

* store
keep id affstart affend length gap start end
sort id affstart affend
compress
save "$data/temp/spellgaps.dta", replace

}
********************************************************************************
* identify overlaps
********************************************************************************
{
* data
use "$data/mcvl2004-17_02affi_rest_age_un.dta", clear
keep id affstart affend reltype
merge m:1 id affstart affend using "$data/temp/spellgaps.dta"
drop _merge
drop if length <= 0
duplicates drop id affstart affend reltype, force
gsort id affstart -affend

* identify completely overlapping spells
gen overlapping = .
local N = 70
forval i = 1(1)`N' {
replace overlapping = `i' if start <= start[_n+`i'] & end >= end[_n+`i'] & id == id[_n+`i']
}

* identify completely overlapped spells
gen overlapped = .
forval i = 1(1)`N' {
replace overlapped = 1 if overlapping[_n-`i'] >= `i' & overlapping[_n-`i'] != . & id == id[_n-`i']
}

* identify partially overlapping spells
gen part_overlapping = .
forval i = 1(1)`N' {
replace part_overlapping = `i' if start <= start[_n+`i'] & end < end[_n+`i'] /*
*/ & end > start[_n+`i'] & id == id[_n+`i']
}

* identify partially overlapped spells
gen part_overlapped = .
forval i = 1(1)`N' {
replace part_overlapped = 1 if part_overlapping[_n-`i'] >= `i' & /*
*/ part_overlapping[_n-`i'] != . & id == id[_n-`i']
}

* drop individuals with more than N overlapped spells
gen temp_outlier = 1 if overlapping == `N'
bysort id: egen outlier = mean(temp_outlier) 
drop if outlier == 1
drop temp_outlier outlier

* readjust overlapped variables
replace overlapped = 0 if overlapped == .
replace part_overlapped = 0 if part_overlapped == .

* label
label var overlapping "spell overlapping # spells"
label var overlapped "spell overlapped"
label var part_overlapping "spell partially overlapping # spells"
label var part_overlapped "spell partially overlapped"

* store
keep id affstart affend overl* part_over*
duplicates drop id affstart affend, force
gsort id affstart -affend
compress
save "$data/temp/overlaps.dta", replace

}
********************************************************************************
* eliminate overlaps 
********************************************************************************
{

* prepare data
use "$data/mcvl2004-17_02affi_rest_age_un.dta", clear
keep id affstart affend reasonend reltype
duplicates drop id affstart affend, force
merge m:1 id affstart affend using "$data/temp/spellgaps.dta", keepusing(start end length)
drop _merge
merge m:1 id affstart affend using "$data/temp/overlaps.dta"
keep if _merge == 3
drop _merge

* check overlapped/part-overlapped spells with suspension of contract
gen susp = 1 if (reltype == 752 | reltype == 754)
replace susp = 0 if (reltype >= 751 & reltype <= 756) & susp == .

* overlapped spell identifier
gsort id affstart -affend
gen t = 1 if id==id[_n-1] & overlapped==1 & overlapped[_n-1] != 1
replace t = 0 if t != 1
replace t = t + t[_n-1] if _n != 1
replace t = . if overlapped != 1
rename t overlapped_id

* totally overlapped contract suspension spells
gen x = 1 if overlapped == 1 & susp == 1
replace x = 0 if overlapped == 1 & susp != 1
bysort overlapped_id: egen mean_x = mean(x)
gsort id affstart -affend
drop if mean_x == 1

* totally overlapped working spells
drop x mean_x
gen x = 1 if overlapped == 1 & (reltype < 751 | reltype > 756) & reltype != .
replace x = 0 if overlapped == 1 & x == .
bysort overlapped_id: egen mean_x = mean(x)
gsort id affstart -affend
drop if mean_x == 1

* totally overlapped unemp spells
drop x mean_x
gen x = 1 if overlapped == 1 & susp == 0
replace x = 0 if overlapped == 1 & x == .
bysort overlapped_id: egen mean_x = mean(x)
gsort id affstart -affend
drop if mean_x == 1

* totally overlapped mixed (work & unemp) spells
drop x mean_x
drop if overlapped_id != .
drop overlapped_id

* first generate new part_overlapped/ing variable
drop part_* overlapped
duplicates drop id affstart affend, force

* identify partially overlapping spells
local N = 70
* partially overlapping if start >= & affend >=
gsort id affstart -affend
gen part_overlapping = .
forval i = 1(1)`N' {
replace part_overlapping = `i' if start <= start[_n+`i'] & end <= end[_n+`i'] /*
*/ & end > start[_n+`i'] & id == id[_n+`i']
}
* identify partially overlapped spells
gen part_overlapped = .
forval i = 1(1)`N' {
replace part_overlapped = 1 if part_overlapping[_n-`i'] >= `i' & /*
*/ part_overlapping[_n-`i'] != . & id == id[_n-`i']
}
replace part_overlapped = 0 if part_overlapped != 1

* identifier
gsort id affstart -affend
gen t = 1 if id==id[_n-1] & part_overlapped==1 & part_overlapped[_n-1] != 1
replace t = 0 if t != 1
replace t = t + t[_n-1] if _n != 1
replace t = . if part_overlapped != 1
rename t part_overlapped_id

* end date modified
gsort id affstart -affend
replace end = start[_n+1] - 1 if part_overlapping != .
label var end "end date of the spell"

* drop if start or end is missing
drop if (start==. | end==.)

* length modified
drop length
gen length = end - start
label var length "length of the spell"

* generate gap (days)
gsort id start -end
gen gap = start - end[_n-1]
egen tagid=tag(id)
replace gap=. if tagid==1
drop tagid
 
* spell modification identifier
gen modif = 1 if overlapping != .
replace modif = 3 if part_overlapping != .
replace modif = 2 if part_overlapping != . & affstart==affstart[_n-1] & end[_n-1] > start
label def modif 1 "overlapped spells eliminated" 2 "start date modified" 3 "end date modified"
label val modif modif
label var modif "modifications to eliminate overlappings"

* store modified spellgaps information
keep id affstart affend start end length gap modif
duplicates drop id affstart affend, force
sort id affstart affend
compress
save "$data/int/spellgaps_nover.dta", replace
cap erase "$data/temp/overlaps.dta"

}
********************************************************************************
* UI entitlement														   
********************************************************************************
{
* data
use "$data/mcvl2004-17_02affi_rest_age_un.dta", clear
keep id affstart affend reltype
duplicates drop id affstart affend, force
merge m:1 id affstart affend using "$data/int/spellgaps_nover.dta"
keep if _merge == 3
drop _merge
gen unemployed = (reltype >= 751 & reltype <= 756)

* generate days contributed
sort id start
gen x = length if unemployed != 1
replace x = x[_n-1] + length if id == id[_n-1] & x[_n-1] != . & unemployed != 1
rename x dcontrwempl

replace gap = . if unemployed != unemployed[_n-1]
by id: gen cumgap = sum(gap) 
by id: replace cumgap = cond(missing(gap[_n-1]),gap, gap + cumgap[_n-1])

* contribution days in the past six years
by id: gen t = (unemployed != 1 & unemployed[_n+1]==1)
by id: gen endminus2 = end - 2190 if t==1
format endminus2 %td
gsort id -start 
by id: replace endminus2 = endminus2[_n-1] if endminus2==.
gen x = length if unemployed != 1
by id: replace x = x[_n-1] + length if x[_n-1] != . & unemployed != 1
rename x dcontrwempl2
sort id start 

* generate start date for first employment after unemployment spell
gen lastestartdate = start if unemployed==0 & (unemployed[_n-1]==1 | (id!=id[_n-1]))
format lastestartdate %td
replace lastestartdate=lastestartdate[_n-1] if unemployed==0 & lastestartdate==.
drop t
gen t=1 if endminus2>=lastestartdate & endminus2!=.
replace t=0 if endminus2<lastestartdate & lastestartdate!=.

gen x = (endminus2>=start & endminus2<=end) & t==1 & id==id[_n-1]
replace x=1 if endminus2<end & endminus2>end[_n-1] & x==0 & id==id[_n-1]
replace x = 2 if x==0 & id != id[_n-1] & endminus2<=end & t==1
gen t_contr6y = dcontrwempl2[_n+1] + (end - endminus2) if x==1 & t==1 & dcontrwempl2[_n+1]!=.
replace t_contr6y = dcontrwempl2 if x==1 & t==1 & dcontrwempl2[_n+1]==.
replace t_contr6y = dcontrwempl2 if x==2 & t==1
drop x
replace t_contr6y = dcontrwempl2 if t==0 & t[_n-1]!=0
 
by id: replace t_contr6y = t_contr6y[_n-1] if t_contr6y==.
drop t
by id: gen t = (unemployed==0 & unemployed[_n+1]==1)
gen contr6y = t_contr6y if t==1
by id: replace contr6y = contr6y[_n-1] if unemployed==1
replace contr6y = 2160 if (contr6y >= 2160 & contr6y!= .) | (length  >= 2160 & length != .  & contr6y != . & unemployed==0)
drop t t_contr6y

* UI entitlements
* Ley 22/1992, de 30 de julio (current system)
gen ui_gen = 0 if contr6y >= 0 & contr6y <= 359
replace ui_gen = 122 if contr6y >= 360 & contr6y <= 539
replace ui_gen = 183 if contr6y >= 540 & contr6y <= 719
replace ui_gen = 244 if contr6y >= 720 & contr6y <= 899
replace ui_gen = 305 if contr6y >= 900 & contr6y <= 1079
replace ui_gen = 365 if contr6y >= 1080 & contr6y <= 1259
replace ui_gen = 426 if contr6y >= 1260 & contr6y <= 1439
replace ui_gen = 487 if contr6y >= 1440 & contr6y <= 1619
replace ui_gen = 548 if contr6y >= 1620 & contr6y <= 1799
replace ui_gen = 609 if contr6y >= 1800 & contr6y <= 1979
replace ui_gen = 670 if contr6y >= 1980 & contr6y <= 2159
replace ui_gen = 731 if contr6y >= 2160 & contr6y < .
label var ui_gen "days of ui generated"

* Ley 31/1984 de 2 de agosto
replace ui_gen = 92 if contr6y >= 180 & contr6y <= 359 & affend <= 19920730
replace ui_gen = 183 if contr6y >= 360 & contr6y <= 539 & affend <= 19920730
replace ui_gen = 274 if contr6y >= 540 & contr6y <= 719 & affend <= 19920730
replace ui_gen = 365 if contr6y >= 720 & contr6y <= 899 & affend <= 19920730
replace ui_gen = 457 if contr6y >= 900 & contr6y <= 1079 & affend <= 19920730
replace ui_gen = 548 if contr6y >= 1080 & contr6y <= 1259 & affend <= 19920730
replace ui_gen = 639 if contr6y >= 1260 & contr6y <= 1449 & affend <= 19920730
replace ui_gen = 731 if contr6y >= 1440 & contr6y < . & affend <= 19920730

* Ley 51/1980, de 8 de octubre
replace ui_gen = 92 if contr6y >= 180 & contr6y <= 359 & affend <= 19840802
replace ui_gen = 183 if contr6y >= 360 & contr6y <= 539 & affend <= 19840802
replace ui_gen = 274 if contr6y >= 540 & contr6y <= 719 & affend <= 19840802
replace ui_gen = 365 if contr6y >= 720 & contr6y <= 899 & affend <= 19840802
replace ui_gen = 457 if contr6y >= 900 & contr6y <= 1079 & affend <= 19840802
replace ui_gen = 548 if contr6y >= 1080 & contr6y < . & affend <= 19840802

* Ley 62/1961, de 22 de julio
replace ui_gen = 183 if contr6y >= 360 & contr6y < . & affstart >= 19610723 & affend <= 19801008 & affstart != .

drop unemployed 
gen unemployed = (reltype == 751 | reltype == 752)
gen length_ui = length if unemployed==1
by id: gen x = (unemployed == 1 & unemployed[_n-1] != 1)
gen temp_uitransition = 0 
replace temp_uitransition = x + temp_uitransition[_n-1] if id == id[_n-1]
drop x
replace length_ui = 0 if length_ui==.
gen ui_left = ui_gen if temp_uitransition== 0 & temp_uitransition[_n+1] == 1
gen x=(temp_uitransition[_n-1]==temp_uitransition)
* generate choice variabe
* choice = 1 if keeps old ui
* choice = 0 if keeps newly generated ui
g uichoice = .

* ui choice
local N = 50
forval i=0(1)`N' {
local j=`i'+1
gen t=(temp_uitransition[_n-1]==`i' & temp_uitransition==`j')
replace ui_left = ui_left[_n-1]-length_ui if ui_left[_n-1]!=. & t==1
replace ui_left = ui_left[_n-1]-length_ui if ui_left[_n-1]!=. & x==1
replace uichoice = 0 if ui_left[_n-1]<=ui_gen[_n-1] & ui_gen!=. & t==1
replace uichoice = 1 if ui_left[_n-1]>ui_gen[_n-1] & ui_left!=. & t==1
replace ui_left = ui_gen[_n-1]-length_ui if uichoice==0
replace ui_left = ui_left[_n-1]-length_ui if uichoice==1
drop t
}
drop x

* ui_left at the start of the spell
gen ui_left_start = ui_left + length_ui
bys id: egen x=max(temp_uitransition)
drop if x>`N'
drop x
replace ui_left=0 if ui_left<0
replace ui_left_start=0 if ui_left_start<0
replace ui_left_start=ui_gen[_n-1] if ui_left_start==0 & (reltype>=753&reltype<=756) & reltype[_n-1]==0

* label
label var ui_left "UI entitlement left at the end of the spell"
label var ui_left_start "UI entitlement left at the start of the spell"

* store
keep id affstart affend start end contr6y ui_gen ui_left ui_left_start uichoice
sort id affstart affend
compress
save "$data/int/ui_entit.dta", replace

}
********************************************************************************
* identify 'fijos discontinuos'													   
********************************************************************************
{

* data
use "$data/mcvl2004-17_02affi_rest_age_un.dta", clear
keep id affstart affend ccc reasonend contgroup reltype contract
merge m:1 id using "$data/int/birthdate.dta", keepusing(bday)
drop _merge
merge m:1 id affstart affend using "$data/temp/spellgaps.dta", keepusing(start end length)
drop _merge

* generate age
g age = (start - bday)/365
g age_end = (end - bday)/365
sort id start
drop start end

* strategies based on instructions from SS
**************
* STRATEGY 1 *
**************
* Si en sus vidas laborales (tabla de Afiliación de la MCVL) existen episodios 
* de afiliación previos al registrado con la clave 753 y 754  en los que el tipo 
* de contrato corresponda a un trabajador fijo discontinuo (claves de contrato 
* que comienzan por 3).
* -> category 4 in contract variable
gen fijod = 1 if contract == 4
bysort id: egen ever_fijod = mean(fijod)
drop fijod
format reasonend %9.0g
format reltype %9.0g
format contract %9.0g
bys id: gen strat1 = 1 if (reltype == 753 | reltype == 754) & contract[_n-1] == 4
replace strat1 = 0 if (reltype == 753 | reltype == 754) & strat1 == .

**************
* STRATEGY 2 *
**************
* Si la edad del trabajador durante el episodio de afiliación con clave 753 o 754,
* calculada a partir de la fecha de nacimiento que consta en la tabla de Personas 
* y de la fecha de alta o de baja del episodio (tabla de Afiliación), es inferior a 55.
gen strat2 = 1 if (reltype == 753 | reltype == 754) & age < 52
replace strat2 = 1 if (reltype == 753 | reltype == 754) & age < 55 & affstart < 19890401
replace strat2 = 1 if (reltype == 753 | reltype == 754) & age < 55 & affstart >= 20120714
replace strat2 = 1 if (reltype == 753 | reltype == 754) & age > 65 & age != .
replace strat2 = 0 if (reltype == 753 | reltype == 754) & strat2 == .

**************
* STRATEGY 3 *
**************
* Si el episodio de alta en afiliación con clave 753 o 754 es de una duración 
* inferior a dos meses.
gen strat3 = 1 if (reltype == 753 | reltype == 754) & length <= 61
replace strat3 = 0 if (reltype == 753 | reltype == 754) & strat3 == .
replace strat3 = 0 if  (reltype == 753 | reltype == 754) & ( (age>=52 & affstart<20120714) | age>=55)

* store temp file
save "$data/temp/fijosd_spells.dta", replace

**************
* STRATEGY 4 *
**************
* La existencia en la tabla de Bases de Cotización de registros que presenten 
* bases de cotización exclusivamente durante los 60 días (o dos meses) siguientes 
* a la fecha de alta que figura en el registro de afiliación con clave 753 y 754.

* generate variable identifying year and month of affiliation episodes start/end
tostring affstart, g(start)
gen yaffs = substr(start, 1, 4)
gen maffs = substr(start, 5, 2)
destring yaffs, replace
destring maffs, replace
tostring affend, g(end)
gen yaffe = substr(end, 1, 4)
gen maffe = substr(end, 5, 2)
destring yaffe, replace
destring maffe, replace

* keep only potential fijod spells
keep if (reltype == 753 | reltype == 754)
keep id age affstart affend yaffs yaffe maffs maffe
compress
save "$data/temp/rel753.dta", replace
egen tagid = tag(id)
keep if tagid == 1
keep id
sort id
compress
save "$data/temp/rel753_id.dta", replace

* restrict to potential subsidy recipients only (reltype 753/754)
use "$data/mcvl2004-17_03cont_rest_age_un.dta", clear
merge m:1 id using "$data/temp/rel753_id.dta", keepusing(id)
keep if _merge == 3
drop _merge
drop if contyear <= 1983 & contyear != .
foreach month in jan feb mar apr may jun jul aug sep oct nov dec {
bysort id contyear: egen `month' = sum(cotb`month')
}
egen tagidyear = tag(id contyear)
keep if tagidyear == 1
drop ccc cotbtot tagidyear
drop cotb*
reshape wide jan feb mar apr may jun jul aug sep oct nov dec, i(id)j(contyear)
* add affiliation info
merge 1:m id using "$data/temp/rel753.dta"
keep if _merge == 3
drop _merge

* generate date in stata ym format
gen start_ym = ym(yaffs,maffs)
gen end_ym = ym(yaffe,maffe)
format start_ym %tm
format end_ym %tm

* identify contribution between start and end dates
forval year=1984(1)2017 {
gen t_`year'1 = jan`year'
gen t_`year'2 = feb`year'
gen t_`year'3 = mar`year'
gen t_`year'4 = apr`year'
gen t_`year'5 = may`year'
gen t_`year'6 = jun`year'
gen t_`year'7 = jul`year'
gen t_`year'8 = aug`year'
gen t_`year'9 = sep`year'
gen t_`year'10 = oct`year'
gen t_`year'11 = nov`year'
gen t_`year'12 = dec`year'
drop *`year'
}
* generate contribution variable based on stata ym date format
local lastmonth = 695
forval i = 288(1)`lastmonth' {
gen cont`i' = .
	forval year = 1984(1)2017 {
		forval month = 1(1)12 {
replace cont`i' = t_`year'`month' if ym(`year',`month') == `i'
}
}
}
drop t_*

* length of the spell (in months)
gen length_ym = end_ym - start_ym
gen contmonths = 0
forval i = 288(1)`lastmonth' {
replace contmonths = contmonths + 1 if start_ym <= `i' & end_ym >= `i' & cont`i' > 0 & cont`i' != . 
}

* identify fijos discontinuos based on strategy 4
gen strat4 = 1 if contmonths <= 2 & contmonths != .
replace strat4 = 1 if contmonths == 3 & length_ym <= 3
replace strat4 = 0 if strat4 == .

* store data
keep id affstart affend strat4
duplicates drop id affstart affend, force
sort id affstart affend
save "$data/temp/fijosd_spells_strat4.dta", replace

* put together
use "$data/temp/fijosd_spells.dta", clear
merge m:1 id affstart affend using "$data/temp/fijosd_spells_strat4.dta"
drop _merge

* generate identifier for the "fijo discontinuo" spell by any strategy
gen fijod_spell = 1 if (strat1 == 1 | strat2 == 1 | strat3 == 1 | strat4 == 1)
replace fijod_spell = 0 if fijod_spell == . & (reltype == 753 | reltype == 754)

* label
label var strat1 "spell from fijo discontinuo worker by strategy 1"
label var strat2 "spell from fijo discontinuo worker by strategy 2"
label var strat3 "spell from fijo discontinuo worker by strategy 3"
label var strat4 "spell from fijo discontinuo worker by strategy 4"
label var fijod_spell "spell from fijo discontinuo worker by any strategy"
label def fijod 1 "yes" 0 "no"
label val strat1 fijod
label val strat2 fijod
label val strat3 fijod
label val strat4 fijod
label val fijod_spell fijod

* store
keep if (reltype == 753 | reltype == 754)
keep id affstart affend strat* fijod_spell
duplicates drop id affstart affend, force
sort id affstart affend
compress
save "$data/int/fijosd_spells.dta", replace

* erase temporary files
cap erase "$data/temp/rel753.dta"
cap erase "$data/temp/rel753_id.dta"
cap erase "$data/temp/fijosd_spells.dta"
cap erase "$data/temp/fijosd_spells_strat4.dta"
cap erase "$data/temp/spellgaps.dta"

}
********************************************************************************
* industry														   
********************************************************************************
{ 
* data
use "$data/mcvl2004-17_02affi_rest_age_un.dta", clear
gsort id affstart affend -industry
duplicates drop id affstart affend, force
keep id affstart affend industry

* add industry information
gen industry_class = . 
replace industry_class = 1 if industry >= 1 & industry <33 & industry !=.
replace industry_class = 2 if industry >= 33 & industry <101 & industry !=. 
replace industry_class = 3 if industry >= 101 & industry <351 & industry !=. 
replace industry_class = 4 if industry >= 351 & industry <360 & industry !=.
replace industry_class = 5 if industry >= 360 & industry <411 & industry !=.
replace industry_class = 6 if industry >= 411 & industry <451 & industry !=.
replace industry_class = 7 if industry >= 451 & industry <491 & industry !=.
replace industry_class = 8 if industry >= 491 & industry <551 & industry !=.
replace industry_class = 9 if industry >= 551 & industry <581 & industry !=.
replace industry_class = 10 if industry >= 581 & industry <641 & industry !=.
replace industry_class = 11 if industry >= 641 & industry <681 & industry !=.
replace industry_class = 12 if industry >= 681 & industry <691 & industry !=.
replace industry_class = 13 if industry >= 691 & industry <771 & industry !=.
replace industry_class = 14 if industry >= 771 & industry <841 & industry !=.
replace industry_class = 15 if industry >= 841 & industry <851 & industry !=.
replace industry_class = 16 if industry >= 851 & industry <861 & industry !=.
replace industry_class = 17 if industry >= 861 & industry <900 & industry !=.
replace industry_class = 18 if industry >= 900 & industry <941 & industry !=.
replace industry_class = 19 if industry >= 941 & industry <970 & industry !=.
replace industry_class = 20 if industry >= 970 & industry <990 & industry !=.
replace industry_class = 21 if industry >= 990 & industry !=.

label define industry_class 1 "A Agriculture, hunting and forestry" /*
*/ 2 "B Mining and quarrying" 3 "C Manufacturing" 4 "D Electricity, gas, steam and air conditioning supply" /*
*/ 5 "E Water supply, sewerage, waste management and remediation activities" 6 "F Construction" /*
*/ 7 "G Wholesale and retail trade; repair of motor vehicles and motorcycles" /*
*/ 8 "I I Accommodation and food service activities" 9 "H Transportation and storage" /*
*/ 10 "J Information and communication" 11 "K Financial and insurance activities" /*
*/ 12 "L Real estate activities" 13 "M Professional, scientific and technical activities" /*
*/ 14 "N Administrative and support service activities" 15 "O Public administration and defence; compulsory social security" /*
*/ 16 "P  Education" 17 " Q Human health and social work activities" 18 "R Arts, entertainment and recreation" /*
*/ 19 "S Other service activities" 20 "T Activities of households as employers" 21 "U Activities of extraterritorial organisations and bodies"

label var industry_class "industry"
label values industry_class industry_class 

drop industry
rename industry_class industry
drop if industry == .

* store
sort id affstart
compress
save "$data/int/industry_spell.dta", replace

} 
********************************************************************************
* pensions 												   
********************************************************************************
{

* data
use "$data/mcvl2004-17_04pens_rest_age_un.dta", clear
keep id year pensionid pensiontype pendate penstatdate penstatus retstat rpartcoef retdate

* generate pendate in stata format
tostring pendate, g(date)
gen ypen = substr(date, 1, 4)
gen mpen = substr(date, 5, 2)
gen dpen = 1 
destring ypen, replace
destring mpen, replace
destring dpen, replace
drop pendate date 
gen pendate = mdy(mpen, dpen, ypen)
drop ypen mpen dpen

* generate retdate in stata format
tostring retdate, g(date)
gen yret = substr(date, 1, 4)
gen mret = substr(date, 5, 2)
gen dret = 1 
destring yret, replace
destring mret, replace
destring dret, replace
drop retdate
gen retdate = mdy(mret, dret, yret)
drop yret mret dret

* disability date
gen disdate=pendate if (pensiontype >=1 & pensiontype<20)
replace disdate=retdate if (pensiontype >=1 & pensiontype<20) & disdate==.

bys id: egen t_retdate = max(retdate)
bys id: egen t_disdate = max(disdate)

drop retdate disdate
rename t_retdate retdate
rename t_disdate disdate

duplicates drop id, force

* store
keep id retdate disdate
format retdate disdate %td
sort id
compress
save "$data/int/pensions.dta", replace

}
********************************************************************************
* cohabitants																	   
********************************************************************************
{
* load data
use "$data/mcvl2004-17_05coha_rest_age_un.dta", replace 

duplicates drop id, force

forval year = 2017(-1)2005 {

	g cohab`year' = 0

	forval i = 1(1)10 {

		cap tostring birthc`i'_`year', force replace
		replace cohab`year' = cohab`year' + 1 if birthc`i'_`year' != ""

	}
}

keep id cohab*

* reshape
reshape long cohab, i(id) j(year)

* label
la var cohab "cohabitants"
la var year "year"

* store data
compress
sort id
save "$data/int/n_cohabitants.dta", replace
}
********************************************************************************
* unemployment rates																   
********************************************************************************
{

* data
use "$data/mcvl2004-17_02affi.dta", clear
keep id affstart affend reltype 
duplicates drop id affstart affend reltype, force

* time variables
g ystart=floor(affstart/10000)
g yend=floor(affend/10000)
tostring affstart, g(start)
tostring affend, g(end)
gen maffs = substr(start, 5, 2)
gen maffe = substr(end, 5, 2)
gen daffs = substr(start, 7, 2)
gen daffe = substr(end, 7, 2)
destring maffs, replace
destring maffe, replace
destring daffs, replace
destring daffe, replace
drop start end
gen start = mdy(maffs, daffs, ystart)
gen end = mdy(maffe, daffe, yend)
format start %td
format end %td
drop maff* daff*
g qstart=qofd(start)
g qend=qofd(end)
format qstart %tq
format qend %tq
merge m:1 id using "$data/mcvl2004-17_01pers.dta", keepusing(residence*)
keep if _m==3
drop _m

forval year=2017(-1)2004 {
	g prov`year' = floor(residence`year'/1000)
	label var prov`year' "province of residence in `year'"
	label val prov`year' province
	replace prov`year'=. if prov`year'==0
}

* unemployment identifier
g u=(reltype >= 751 & reltype <= 756)

** unemployment rates
* calculate unemployment rate based on employment status in week before quarter
* methodology as in EPA
* quarters
	* Q1: Dec 25 (prev year) - Jan 1
	* Q2: Mar 25 - Apr 1
	* Q3: Jun 25 - Jul 1
	* Q4: Sep 25 - Oct 1
drop if (end<mdy(12,25,2003))

* locals
local firstyear=2004
local lastyear=2017
local firstq = 1
local lastq = 4

* generate urate
forval year = `lastyear'(-1)`firstyear' { 
local yearl1=`year'-1
	forval quarter = `firstq'(1)`lastq' { 
		preserve
		* (1) identify spells from year/quarter
		if `quarter'==1 {	
			g x=(end>=mdy(12,25,`yearl1') & start<=mdy(1,1,`year'))
			}
		else if `quarter'==2 {
			g x=(end>=mdy(3,25,`year') & start<=mdy(4,1,`year'))
			}	
		else if `quarter'==3 {
			g x=(end>=mdy(6,25,`year') & start<=mdy(7,1,`year'))
			}
		else if `quarter'==4 {
			g x=(end>=mdy(9,25,`year') & start<=mdy(10,1,`year'))
			}
		* (2) keep only spells identified
		keep if x==1
		drop if (residence`year'==0|residence`year'==.)
		* (3) generate urate by province
		bys prov`year': egen urate_prov`year'q`quarter' = mean(u)
		* (4) store temporary dataset with generated urates
		keep prov`year' urate*
		duplicates drop prov`year', force
		rename prov`year' prov
		rename urate_prov`year'q`quarter' urate_prov
		g year=`year'
		g quarter=`quarter'
		sort prov urate*
		order year quarter prov urate*
		save "$data/temp/urate`year'q`quarter'.dta", replace

		restore
	} 
} 

* append
forval year = `lastyear'(-1)`firstyear' { 
	forval quarter = `firstq'(1)`lastq' { 
		append using "$data/temp/urate`year'q`quarter'.dta"
		cap erase "$data/temp/urate`year'q`quarter'.dta"
		} 
} 

* label
label var urate_prov "quarterly unemployment rate"

keep year quarter prov urate_prov
drop if year==.

* var in urate
egen tagyqp=tag(year quarter prov)
gsort -tagyqp prov year quarter
g t_var_urate_prov = (urate_prov-urate_prov[_n-1])/urate_prov[_n-1]
sort tagyqp prov year quarter
by tagyqp prov: replace t_var_urate_prov=. if _n==1
replace t_var_urate_prov=. if tagyqp==0
bys prov year quarter: egen var_urate_prov=mean(t_var_urate_prov)
drop tagyqp t_var_urate_prov
label var var_urate_prov "variation in quarterly unemployment"

* store
sort year quarter
order year quarter prov urate* 
compress
save "$data/int/urates.dta", replace

}
********************************************************************************
* closing																	   
********************************************************************************
{
cap log close
clear
}
